{
 "nbformat": 4,
 "nbformat_minor": 2,
 "metadata": {
  "language_info": {
   "name": "python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "version": "3.7.6-final"
  },
  "orig_nbformat": 2,
  "file_extension": ".py",
  "mimetype": "text/x-python",
  "name": "python",
  "npconvert_exporter": "python",
  "pygments_lexer": "ipython3",
  "version": 3,
  "kernelspec": {
   "name": "python37664bitwuhan2019ncovpipenv5f20d42ee8464e6b95cd78c3cee9d475",
   "display_name": "Python 3.7.6 64-bit ('Wuhan-2019-nCoV': pipenv)"
  }
 },
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>confirmed</th>\n      <th>suspected</th>\n      <th>dead</th>\n      <th>cured</th>\n      <th>date</th>\n      <th>country</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2846</td>\n      <td>5796</td>\n      <td>81</td>\n      <td>58</td>\n      <td>2020-01-27</td>\n      <td>中国</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
      "text/plain": "   confirmed  suspected  dead  cured        date country\n0       2846       5796    81     58  2020-01-27      中国"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取腾讯新闻实时统计数据\n",
    "import json\n",
    "from datetime import datetime\n",
    "import requests\n",
    "import pandas as pd\n",
    "\n",
    "cn_global_data = requests.get(\"https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_global_vars\").json()\n",
    "cn_global = json.loads(cn_global_data[\"data\"])\n",
    "cn_global_df = pd.DataFrame(cn_global)\n",
    "cn_global_df.drop(columns=[\"recentTime\", \"useTotal\", \"hintWords\"], inplace=True)\n",
    "cn_global_df.rename(columns={\n",
    "    \"area\": \"province\",\n",
    "    \"confirmCount\": \"confirmed\",\n",
    "    \"suspectCount\": \"suspected\",\n",
    "    \"cure\": \"cured\",\n",
    "    \"deadCount\": \"dead\"\n",
    "}, inplace=True)\n",
    "cn_global_df[\"date\"] = datetime.today().strftime('%Y-%m-%d')\n",
    "cn_global_df[\"country\"] = \"中国\"\n",
    "cn_global_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>country</th>\n      <th>province</th>\n      <th>city</th>\n      <th>confirmed</th>\n      <th>suspected</th>\n      <th>dead</th>\n      <th>cured</th>\n      <th>date</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>中国</td>\n      <td>湖北</td>\n      <td>武汉</td>\n      <td>698</td>\n      <td>0</td>\n      <td>63</td>\n      <td>42</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>中国</td>\n      <td>湖北</td>\n      <td>黄冈</td>\n      <td>154</td>\n      <td>0</td>\n      <td>4</td>\n      <td>2</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>中国</td>\n      <td>湖北</td>\n      <td>孝感</td>\n      <td>100</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>中国</td>\n      <td>湖北</td>\n      <td>荆门</td>\n      <td>90</td>\n      <td>0</td>\n      <td>3</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>中国</td>\n      <td>湖北</td>\n      <td>恩施州</td>\n      <td>25</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>296</th>\n      <td>中国</td>\n      <td>四川</td>\n      <td>宜宾</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>297</th>\n      <td>中国</td>\n      <td>四川</td>\n      <td>凉山</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>298</th>\n      <td>中国</td>\n      <td>陕西</td>\n      <td>渭南</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>299</th>\n      <td>中国</td>\n      <td>山东</td>\n      <td>枣庄</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n    <tr>\n      <th>300</th>\n      <td>巴基斯坦</td>\n      <td></td>\n      <td></td>\n      <td>0</td>\n      <td>5</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2020-01-27</td>\n    </tr>\n  </tbody>\n</table>\n<p>301 rows × 8 columns</p>\n</div>",
      "text/plain": "    country province city  confirmed  suspected  dead  cured        date\n0        中国       湖北   武汉        698          0    63     42  2020-01-27\n1        中国       湖北   黄冈        154          0     4      2  2020-01-27\n2        中国       湖北   孝感        100          0     1      0  2020-01-27\n3        中国       湖北   荆门         90          0     3      0  2020-01-27\n4        中国       湖北  恩施州         25          0     0      0  2020-01-27\n..      ...      ...  ...        ...        ...   ...    ...         ...\n296      中国       四川   宜宾          1          0     0      0  2020-01-27\n297      中国       四川   凉山          1          0     0      0  2020-01-27\n298      中国       陕西   渭南          1          0     0      0  2020-01-27\n299      中国       山东   枣庄          2          0     0      0  2020-01-27\n300    巴基斯坦                        0          5     0      0  2020-01-27\n\n[301 rows x 8 columns]"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取腾讯新闻实时分地区数据\n",
    "import json\n",
    "from datetime import datetime\n",
    "import requests\n",
    "import pandas as pd\n",
    "\n",
    "cn_area_data = requests.get(\"https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_area_counts\").json()\n",
    "cn_area = json.loads(cn_area_data[\"data\"])\n",
    "cn_area_df = pd.DataFrame(cn_area)\n",
    "cn_area_df.rename(columns={\n",
    "    \"area\": \"province\",\n",
    "    \"confirm\": \"confirmed\",\n",
    "    \"suspect\": \"suspected\",\n",
    "    \"heal\": \"cured\"\n",
    "}, inplace=True)\n",
    "cn_area_df[\"date\"] = datetime.today().strftime('%Y-%m-%d')\n",
    "cn_area_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>date</th>\n      <th>confirmed</th>\n      <th>suspected</th>\n      <th>dead</th>\n      <th>cured</th>\n      <th>country</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2020-01-21</td>\n      <td>440</td>\n      <td>37</td>\n      <td>9</td>\n      <td>25</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2020-01-13</td>\n      <td>41</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2020-01-14</td>\n      <td>41</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2020-01-15</td>\n      <td>41</td>\n      <td>0</td>\n      <td>2</td>\n      <td>5</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2020-01-16</td>\n      <td>45</td>\n      <td>0</td>\n      <td>2</td>\n      <td>8</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>2020-01-17</td>\n      <td>62</td>\n      <td>0</td>\n      <td>2</td>\n      <td>12</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>2020-01-18</td>\n      <td>198</td>\n      <td>0</td>\n      <td>3</td>\n      <td>17</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>2020-01-19</td>\n      <td>275</td>\n      <td>0</td>\n      <td>4</td>\n      <td>18</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>2020-01-24</td>\n      <td>1287</td>\n      <td>1965</td>\n      <td>41</td>\n      <td>38</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>2020-01-23</td>\n      <td>830</td>\n      <td>1072</td>\n      <td>25</td>\n      <td>34</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>2020-01-22</td>\n      <td>571</td>\n      <td>393</td>\n      <td>17</td>\n      <td>0</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>2020-01-20</td>\n      <td>291</td>\n      <td>54</td>\n      <td>6</td>\n      <td>25</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>2020-01-25</td>\n      <td>1429</td>\n      <td>1965</td>\n      <td>42</td>\n      <td>38</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>2020-01-26</td>\n      <td>2081</td>\n      <td>2684</td>\n      <td>56</td>\n      <td>49</td>\n      <td>中国</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>2020-01-27</td>\n      <td>2846</td>\n      <td>5794</td>\n      <td>81</td>\n      <td>58</td>\n      <td>中国</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
      "text/plain": "          date confirmed suspected dead cured country\n0   2020-01-21       440        37    9    25      中国\n1   2020-01-13        41         0    1     0      中国\n2   2020-01-14        41         0    1     0      中国\n3   2020-01-15        41         0    2     5      中国\n4   2020-01-16        45         0    2     8      中国\n5   2020-01-17        62         0    2    12      中国\n6   2020-01-18       198         0    3    17      中国\n7   2020-01-19       275         0    4    18      中国\n8   2020-01-24      1287      1965   41    38      中国\n9   2020-01-23       830      1072   25    34      中国\n10  2020-01-22       571       393   17     0      中国\n11  2020-01-20       291        54    6    25      中国\n12  2020-01-25      1429      1965   42    38      中国\n13  2020-01-26      2081      2684   56    49      中国\n14  2020-01-27      2846      5794   81    58      中国"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取腾讯新闻日统计数据\n",
    "import json\n",
    "import requests\n",
    "import pandas as pd\n",
    "\n",
    "cn_day_data = requests.get(\"https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_cn_day_counts\").json()\n",
    "cn_day = json.loads(cn_day_data[\"data\"])\n",
    "cn_day_df = pd.DataFrame(cn_day)\n",
    "cn_day_df.rename(columns={\n",
    "    \"confirm\": \"confirmed\",\n",
    "    \"suspect\": \"suspected\",\n",
    "    \"heal\": \"cured\"\n",
    "}, inplace=True)\n",
    "cn_day_df[\"date\"] = cn_day_df[\"date\"].map(lambda x: \"2020-\" + x.replace(\".\", \"-\"))\n",
    "cn_day_df[\"country\"] = \"中国\"\n",
    "cn_day_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>date</th>\n      <th>country</th>\n      <th>countryCode</th>\n      <th>province</th>\n      <th>provinceCode</th>\n      <th>city</th>\n      <th>cityCode</th>\n      <th>confirmed</th>\n      <th>suspected</th>\n      <th>cured</th>\n      <th>dead</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>2020-01-13</td>\n      <td>中国</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>41</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2020-01-14</td>\n      <td>中国</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>41</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2020-01-15</td>\n      <td>中国</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>41</td>\n      <td>0</td>\n      <td>5</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2020-01-16</td>\n      <td>中国</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>45</td>\n      <td>0</td>\n      <td>8</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>2020-01-17</td>\n      <td>中国</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>62</td>\n      <td>0</td>\n      <td>12</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>220</th>\n      <td>2020-01-27</td>\n      <td>澳大利亚</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>5</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>216</th>\n      <td>2020-01-27</td>\n      <td>美国</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>5</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>218</th>\n      <td>2020-01-27</td>\n      <td>越南</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>215</th>\n      <td>2020-01-27</td>\n      <td>韩国</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>4</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>224</th>\n      <td>2020-01-27</td>\n      <td>马来西亚</td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td></td>\n      <td>4</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>334 rows × 11 columns</p>\n</div>",
      "text/plain": "           date country countryCode province provinceCode city cityCode  \\\n1    2020-01-13      中国                                                   \n2    2020-01-14      中国                                                   \n3    2020-01-15      中国                                                   \n4    2020-01-16      中国                                                   \n5    2020-01-17      中国                                                   \n..          ...     ...         ...      ...          ...  ...      ...   \n220  2020-01-27    澳大利亚                                                   \n216  2020-01-27      美国                                                   \n218  2020-01-27      越南                                                   \n215  2020-01-27      韩国                                                   \n224  2020-01-27    马来西亚                                                   \n\n     confirmed  suspected  cured  dead  \n1           41          0      0     1  \n2           41          0      0     1  \n3           41          0      5     2  \n4           45          0      8     2  \n5           62          0     12     2  \n..         ...        ...    ...   ...  \n220          5          0      0     0  \n216          5          0      0     0  \n218          2          0      0     0  \n215          4          0      0     0  \n224          4          0      0     0  \n\n[334 rows x 11 columns]"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 更新数据\n",
    "import re\n",
    "import pandas as pd\n",
    "\n",
    "csv_file = \"Wuhan-2019-nCoV.csv\"\n",
    "json_file = \"Wuhan-2019-nCoV.json\"\n",
    "\n",
    "df = pd.read_csv(csv_file)\n",
    "df[\"date\"] = df[\"date\"].map(lambda x: \"-\".join([i.zfill(2) for i in re.split(\"\\\\D+\", x)]))\n",
    "df = pd.concat([df, cn_area_df, cn_global_df, cn_day_df], sort=False)\n",
    "df[\"country\"].fillna(\"\", inplace=True)\n",
    "df[\"countryCode\"].fillna(\"\", inplace=True)\n",
    "df[\"province\"].fillna(\"\", inplace=True)\n",
    "df[\"provinceCode\"].fillna(\"\", inplace=True)\n",
    "df[\"city\"].fillna(\"\", inplace=True)\n",
    "df[\"cityCode\"].fillna(\"\", inplace=True)\n",
    "df[\"confirmed\"] = df[\"confirmed\"].fillna(0).astype(int)\n",
    "df[\"suspected\"] = df[\"suspected\"].fillna(0).astype(int)\n",
    "df[\"cured\"] = df[\"cured\"].fillna(0).astype(int)\n",
    "df[\"dead\"] = df[\"dead\"].fillna(0).astype(int)\n",
    "df.drop_duplicates(subset=[\"date\", \"country\", \"province\", \"city\"], keep=\"last\", inplace=True)\n",
    "df.sort_values([\"date\", \"country\", \"province\", \"city\"], na_position=\"first\", inplace=True)\n",
    "df.to_csv(csv_file, index=False, encoding='utf-8')\n",
    "df.to_json(json_file, orient=\"records\", force_ascii=False)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import math\n",
    "from functools import lru_cache\n",
    "import pandas as pd\n",
    "\n",
    "csv_file = \"Wuhan-2019-nCoV.csv\"\n",
    "\n",
    "country_code = pd.read_csv(\"CountryCode.csv\")\n",
    "china_area_code = pd.read_csv(\"ChinaAreaCode.csv\")\n",
    "china_area_code[\"code\"] = china_area_code[\"code\"].astype(str)\n",
    "# china_area_code = china_area_code[china_area_code.apply(lambda x: bool(re.match(\"\\\\d{4}00$\", x.code)), axis=1)]\n",
    "china_area_code[\"is_province\"] = china_area_code[\"code\"].map(lambda x: bool(re.match(\"\\\\d{2}0000$\", x)))\n",
    "# china_area_code[\"city_code\"] = china_area_code[\"code\"].map(lambda x: re.sub(\"\\\\d{2}$\", \"00\", x))\n",
    "china_area_code[\"province_code\"] = china_area_code[\"code\"].map(lambda x: re.sub(\"\\\\d{4}$\", \"0000\", x))\n",
    "\n",
    "@lru_cache(maxsize = 128)\n",
    "def get_country_code(name):\n",
    "    result =  country_code.loc[country_code[\"name\"].isin([name])][\"code\"]\n",
    "    if (len(result.values) > 0):\n",
    "        return result.values[0]\n",
    "    return \"\"\n",
    "\n",
    "\n",
    "@lru_cache(maxsize = 32)\n",
    "def get_china_province_code(name):\n",
    "    if not name:\n",
    "        return \"\"\n",
    "    result = china_area_code.loc[china_area_code[\"is_province\"] & china_area_code[\"name\"].str.contains(name)][\"code\"]\n",
    "    if (len(result.values) > 0):\n",
    "        return result.values[0]\n",
    "    return \"\"\n",
    "\n",
    "\n",
    "# @lru_cache(maxsize = 1024)\n",
    "def get_china_city_code(province_code, name):\n",
    "    if not name or not province_code:\n",
    "        return \"\"\n",
    "    result = china_area_code.loc[china_area_code[\"province_code\"].isin([province_code]) & ~china_area_code[\"is_province\"] & china_area_code[\"name\"].str.contains(name)][\"code\"]\n",
    "    if (len(result.values) > 0):\n",
    "        return result.values[0]\n",
    "\n",
    "    for i in range(1, len(name)):\n",
    "        fuzzy_name = name[:-i] + \".*\" + \".*\".join(name[-i:])\n",
    "        result = china_area_code.loc[china_area_code[\"province_code\"].isin([province_code]) & ~china_area_code[\"is_province\"] & china_area_code[\"name\"].str.match(fuzzy_name)][\"code\"]\n",
    "        if (len(result.values) > 0):\n",
    "            print(f\"\"\"{province_code} {fuzzy_name} -> {\",\".join(result.values)}\"\"\")\n",
    "            return result.values[0]\n",
    "\n",
    "    return \"\"\n",
    "\n",
    "\n",
    "@lru_cache(maxsize = 1024)\n",
    "def get_china_area_name(code, name):\n",
    "    if not code:\n",
    "        return name\n",
    "    result = china_area_code.loc[china_area_code[\"code\"].isin([code])][\"name\"]\n",
    "    if (len(result.values) > 0):\n",
    "        return result.values[0]\n",
    "    return name\n",
    "\n",
    "df = pd.read_csv(csv_file)\n",
    "df[\"country\"].fillna(\"\", inplace=True)\n",
    "df[\"countryCode\"].fillna(\"\", inplace=True)\n",
    "df[\"province\"].fillna(\"\", inplace=True)\n",
    "df[\"provinceCode\"].fillna(\"\", inplace=True)\n",
    "df[\"city\"].fillna(\"\", inplace=True)\n",
    "df[\"cityCode\"].fillna(\"\", inplace=True)\n",
    "df[\"countryCode\"] = df[\"country\"].map(get_country_code)\n",
    "df[\"provinceCode\"] = df[\"province\"].map(get_china_province_code)\n",
    "df[\"province\"] = df.apply(lambda x: get_china_area_name(x.provinceCode, x.province), axis = 1)\n",
    "df[\"cityCode\"] = df.apply(lambda x: get_china_city_code(x.provinceCode, x.city), axis = 1)\n",
    "df[\"city\"] = df.apply(lambda x: get_china_area_name(x.cityCode, x.city), axis = 1)\n",
    "df.to_csv(\"temp.csv\", index=False)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "'220200'"
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_china_city_code(\"220000\", \"吉林\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "黔南\n"
    }
   ],
   "source": [
    "import math\n",
    "s = \"黔南州\"\n",
    "n = math.ceil(len(s) / 2)\n",
    "for i in range(1, n):\n",
    "    print(s[:-i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}